In this document, we analyze the contextualized sensorimotor norms: judgments about the strength of different sensorimotor dimensions of ambiguous words, in context.
We use these norms in several analyses:
First, load the data.
df_contextualized_meanings = read_csv("../../data/processed/contextualized_sensorimotor_norms.csv")
## New names:
## Rows: 448 Columns: 28
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): word, sentence, context dbl (25): ...1, Vision.M, Vision.SD, Hearing.M,
## Hearing.SD, Olfaction.M, Olf...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
nrow(df_contextualized_meanings)
## [1] 448
df_contextualized_meanings_long = df_contextualized_meanings %>%
pivot_longer(cols = c(Vision.M, Hearing.M, Olfaction.M,
Taste.M, Interoception.M, Touch.M,
Mouth_throat.M, Head.M, Torso.M,
Hand_arm.M, Foot_leg.M),
names_to = "Dimension",
values_to = "Strength") %>%
mutate(Dimension = gsub('.M', '', Dimension))
df_contextualized_meanings_long %>%
ggplot(aes(x = reorder(Dimension, Strength),
y = Strength)) +
geom_violin() +
geom_jitter(alpha = .1,
width = .1) +
coord_flip() +
labs(y = "Sensorimotor strength",
x = "Dimension") +
theme_bw() +
theme(text = element_text(size=20))
columns = df_contextualized_meanings %>%
mutate(Vision = Vision.M,
Hearing = Hearing.M,
Olfaction = Olfaction.M,
Taste = Taste.M,
Interoception = Interoception.M,
Touch = Touch.M,
Mouth_throat = Mouth_throat.M,
Head = Head.M,
Torso = Torso.M,
Hand_arm = Hand_arm.M,
Foot_leg = Foot_leg.M) %>%
select(Vision, Hearing, Olfaction,
Taste, Interoception, Touch,
Mouth_throat, Head, Torso,
Hand_arm, Foot_leg)
cors = cor(columns)
# cors[lower.tri(cors, diag=TRUE)] <- 0
# Plot the correlation matrix
ggcorrplot(cors,
hc.order = FALSE,
# method = "square",
type = "upper") +
theme(
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
)
df_contextualized_meanings_long %>%
group_by(word, Dimension) %>%
mutate(Strength_scaled = scale(Strength)) %>%
filter(word == "market") %>%
ggplot(aes(x = reorder(Dimension, Strength_scaled),
y = Strength_scaled,
fill = Dimension)) +
geom_bar(stat = "identity") +
theme_bw() +
coord_flip() +
labs(x = "Dimension",
y = "Sensorimotor Strength (z-scored)") +
facet_wrap(~sentence) +
scale_fill_manual(values = viridisLite::viridis(11, option = "mako",
begin = 0.8, end = 0.15)) +
theme(text = element_text(size=16)) +
guides(fill=FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Load the item-level means for the sensorimotor norms.
df_contextualized_meanings = read_csv("../../data/processed/contextualized_sensorimotor_norms_with_ls.csv")
## New names:
## Rows: 448 Columns: 30
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): word, sentence, context dbl (27): ...1, Unnamed: 0, Vision.M, Vision.SD,
## Hearing.M, Hearing.SD, Olfa...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
nrow(df_contextualized_meanings)
## [1] 448
Load the dominance norms.
df_dominance = read_csv("../../data/processed/dominance_norms_with_order.csv")
## New names:
## Rows: 896 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): word, version_with_order, ambiguity_type dbl (3): ...1, dominance_right,
## sd_dominance
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## Determine the specific sense/meaning of the righthand context
df_dominance = df_dominance %>%
mutate(context = substr(version_with_order, 6, 9))
## Now group by that righthand context to get relative dominance of that meaning
df_dominance_individual = df_dominance %>%
group_by(word, context) %>%
summarise(dominance = mean(dominance_right))
## `summarise()` has grouped output by 'word'. You can override using the
## `.groups` argument.
nrow(df_dominance_individual)
## [1] 448
Merge the dominance and sensorimotor norms data.
df_dom_plus_sm = df_contextualized_meanings %>%
inner_join(df_dominance_individual)
## Joining with `by = join_by(word, context)`
nrow(df_dom_plus_sm)
## [1] 448
We also load and merge the Lancaster norms, as a control.
df_lancaster = read_csv("../../data/lexical/lancaster_norms.csv")
## Rows: 39707 Columns: 45
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Word, Dominant.perceptual, Dominant.action, Dominant.sensorimotor,...
## dbl (39): Auditory.mean, Gustatory.mean, Haptic.mean, Interoceptive.mean, Ol...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_lancaster = df_lancaster %>%
mutate(word = tolower(Word)) %>%
select(-Foot_leg.SD, -Torso.SD, -Head.SD, -Hand_arm.SD)
df_dom_plus_sm = df_dom_plus_sm %>%
inner_join(df_lancaster)
## Joining with `by = join_by(word)`
nrow(df_dom_plus_sm)
## [1] 448
Based on Lynott et al (2019), we contextualized sensorimotor strength as the maximum strength across all the dimensions.
df_dom_plus_sm = df_dom_plus_sm %>%
rowwise() %>%
mutate(max_strength = max(
c(
## Modalities
Vision.M,
Hearing.M,
Olfaction.M,
Touch.M,
Taste.M,
Interoception.M,
## Effectors
Head.M,
Mouth_throat.M,
Torso.M,
Hand_arm.M,
Foot_leg.M
)
),
max_perceptual_strength = max(
c(
## Modalities
Vision.M,
Hearing.M,
Olfaction.M,
Touch.M,
Taste.M,
Interoception.M
)
),
max_action_strength = max(
c(
## Effectors
Head.M,
Mouth_throat.M,
Torso.M,
Hand_arm.M,
Foot_leg.M
)
)
) %>%
ungroup()
df_dom_plus_sm %>%
ggplot(aes(x = Max_strength.sensorimotor,
y = max_strength)) +
geom_point(alpha = .5) +
labs(y = "Maximum Contextualized Strength",
x = "Maximum Strength (Lancaster)") +
theme_bw()
cor.test(df_dom_plus_sm$Max_strength.sensorimotor,
df_dom_plus_sm$max_strength)
##
## Pearson's product-moment correlation
##
## data: df_dom_plus_sm$Max_strength.sensorimotor and df_dom_plus_sm$max_strength
## t = 10.373, df = 446, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3630485 0.5125743
## sample estimates:
## cor
## 0.440865
The answer is yes: contexts with a higher maximum sensorimotor strength also tend to be rated as more dominant.
Notably, this is true above and beyond the decontextualized ratings of sensorimotor strength for a given word.
df_dom_plus_sm %>%
ggplot(aes(x = max_strength,
y = dominance)) +
geom_point(alpha = .4) +
geom_smooth(method = "lm") +
labs(x = "Maximum Contextualized Strength",
y = "Dominance") +
theme_minimal() +
theme(text = element_text(size=16))
## `geom_smooth()` using formula = 'y ~ x'
model_full = lmer(data = df_dom_plus_sm,
dominance ~
max_strength +
Max_strength.sensorimotor + Minkowski3.sensorimotor +
(1 | word),
REML = FALSE)
## boundary (singular) fit: see help('isSingular')
model_reduced = lmer(data = df_dom_plus_sm,
dominance ~
# max_strength +
Max_strength.sensorimotor + Minkowski3.sensorimotor +
(1 | word),
REML = FALSE)
## boundary (singular) fit: see help('isSingular')
summary(model_full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula:
## dominance ~ max_strength + Max_strength.sensorimotor + Minkowski3.sensorimotor +
## (1 | word)
## Data: df_dom_plus_sm
##
## AIC BIC logLik -2*log(L) df.resid
## 1070.7 1095.3 -529.4 1058.7 442
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.30770 -0.76937 -0.01941 0.78910 2.13757
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.0000 0.0000
## Residual 0.6221 0.7887
## Number of obs: 448, groups: word, 112
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.47619 0.24549 448.00000 -1.940 0.0530 .
## max_strength 0.25833 0.05964 448.00000 4.332 1.83e-05 ***
## Max_strength.sensorimotor 0.02905 0.10851 448.00000 0.268 0.7891
## Minkowski3.sensorimotor -0.13254 0.07421 448.00000 -1.786 0.0748 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) mx_str Mx_st.
## max_strngth -0.366
## Mx_strngth. -0.430 -0.070
## Mnkwsk3.sns 0.032 -0.244 -0.789
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
anova(model_full, model_reduced)
## Data: df_dom_plus_sm
## Models:
## model_reduced: dominance ~ Max_strength.sensorimotor + Minkowski3.sensorimotor + (1 | word)
## model_full: dominance ~ max_strength + Max_strength.sensorimotor + Minkowski3.sensorimotor + (1 | word)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## model_reduced 5 1087.1 1107.6 -538.54 1077.1
## model_full 6 1070.7 1095.3 -529.35 1058.7 18.381 1 1.808e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df_dom_plus_sm %>%
mutate(resid = residuals(model_reduced)) %>%
ggplot(aes(x = max_strength,
y = resid)) +
geom_point(alpha = .4) +
geom_smooth(method = "lm") +
labs(x = "Maximum Contextualized Strength",
y = "Residuals (Reduced model)") +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
Here, we ask whether specific dimensions are particularly correlated with sense dominance.
features <- c("Vision.M", "Hearing.M", "Olfaction.M", "Touch.M", "Taste.M",
"Interoception.M", "Head.M", "Mouth_throat.M", "Torso.M",
"Hand_arm.M", "Foot_leg.M")
# Models with each feature + baseline covariate
r2_results <- map_dfr(features, function(feat) {
formula <- as.formula(paste0("dominance ~ Max_strength.sensorimotor + ", feat))
model <- lm(formula, data = df_dom_plus_sm)
tibble(
feature = feat,
R2 = summary(model)$r.squared,
beta = coef(model)[3], # coefficient for the feature (3rd term now)
p_value = summary(model)$coefficients[3, 4]
)
})
# Baseline model (just Max_strength.sensorimotor)
baseline_model <- lm(dominance ~ Max_strength.sensorimotor, data = df_dom_plus_sm)
baseline_row <- tibble(
feature = "Baseline",
R2 = summary(baseline_model)$r.squared,
beta = NA,
p_value = NA
)
# Combine
r2_results_with_baseline <- bind_rows(baseline_row, r2_results)
# Plot
r2_results_with_baseline %>%
mutate(feature = str_remove(feature, "\\.M$"),
feature = str_replace(feature, "_", "/"),
sig = ifelse(p_value < .05, "*", ""),
is_baseline = feature == "Baseline",
feature = fct_reorder(feature, R2)) %>%
ggplot(aes(x = R2, y = feature, fill = is_baseline)) +
geom_col() +
geom_text(aes(label = sig), hjust = -0.5, size = 6, na.rm = TRUE) +
scale_fill_manual(values = c("grey40", "steelblue"), guide = "none") +
labs(x = "R²", y = NULL, title = "Predicting Sense Dominance") +
theme_minimal() +
theme(text = element_text(size = 16))
cor.test(df_dom_plus_sm$Touch.M,
df_dom_plus_sm$dominance)
##
## Pearson's product-moment correlation
##
## data: df_dom_plus_sm$Touch.M and df_dom_plus_sm$dominance
## t = 3.5374, df = 446, p-value = 0.0004464
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.07368382 0.25395880
## sample estimates:
## cor
## 0.1652009
cor.test(df_dom_plus_sm$Vision.M,
df_dom_plus_sm$dominance)
##
## Pearson's product-moment correlation
##
## data: df_dom_plus_sm$Vision.M and df_dom_plus_sm$dominance
## t = 4.0423, df = 446, p-value = 6.235e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09703963 0.27583493
## sample estimates:
## cor
## 0.1879943
It looks like vision and touch are especially strong predictors. Which items drive this, e.g., for touch?
by_sense <- df_dom_plus_sm %>%
mutate(sense = str_extract(context, "M[12]")) %>%
group_by(word, sense) %>%
summarize(
mean_touch = mean(Touch.M),
mean_vision = mean(Vision.M),
mean_dom = mean(dominance),
.groups = "drop"
)
# Now 2 rows per word—compute within-word difference
sense_diffs <- by_sense %>%
pivot_wider(names_from = sense,
values_from = c(mean_touch, mean_vision, mean_dom)) %>%
mutate(
touch_diff = mean_touch_M1 - mean_touch_M2,
vision_diff = mean_vision_M1 - mean_vision_M2,
dom_diff = mean_dom_M1 - mean_dom_M2
)
sense_diffs %>%
mutate(abs_touch_diff = abs(touch_diff)) %>%
arrange(desc(abs_touch_diff)) %>%
select(word, abs_touch_diff, dom_diff) %>%
head(5)
## # A tibble: 5 × 3
## word abs_touch_diff dom_diff
## <chr> <dbl> <dbl>
## 1 punch 2.87 -1.54
## 2 spill 2.49 2.44
## 3 racket 2.48 -1.92
## 4 clip 2.01 0.102
## 5 case 1.99 -0.894
sense_diffs %>%
mutate(abs_vision_diff = abs(vision_diff)) %>%
arrange(desc(abs_vision_diff)) %>%
select(word, abs_vision_diff, dom_diff) %>%
head(5)
## # A tibble: 5 × 3
## word abs_vision_diff dom_diff
## <chr> <dbl> <dbl>
## 1 break 2.02 1.30
## 2 block 2.02 0.810
## 3 spill 1.96 2.44
## 4 market 1.80 0.941
## 5 drain 1.73 -2.08
# Does the sense with higher Touch or Vision tend to be more dominant?
cor.test(sense_diffs$touch_diff, sense_diffs$dom_diff)
##
## Pearson's product-moment correlation
##
## data: sense_diffs$touch_diff and sense_diffs$dom_diff
## t = 2.8459, df = 110, p-value = 0.005284
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.08021574 0.42669538
## sample estimates:
## cor
## 0.2618737
cor.test(sense_diffs$vision_diff, sense_diffs$dom_diff)
##
## Pearson's product-moment correlation
##
## data: sense_diffs$vision_diff and sense_diffs$dom_diff
## t = 4.4908, df = 110, p-value = 1.758e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2244579 0.5397527
## sample estimates:
## cor
## 0.3936196
Now, we compare each dimension to the LS Norms.
df_diffs = df_dom_plus_sm %>%
mutate(vision_diff = (Vision.M - Visual.mean),
auditory_diff = (Hearing.M - Auditory.mean),
intero_diff = (Interoception.M - Interoceptive.mean),
olfactory_diff = (Olfaction.M - Olfactory.mean),
touch_diff = (Touch.M - Haptic.mean),
taste_diff = (Taste.M - Gustatory.mean),
torso_diff = (Torso.M - Torso.mean),
hand_arm_diff = (Hand_arm.M - Hand_arm.mean),
foot_leg_diff = (Foot_leg.M - Foot_leg.mean),
head_diff = (Head.M - Head.mean),
mouth_throat_diff = (Mouth_throat.M - Mouth.mean)) %>%
pivot_longer(cols = c(vision_diff,
auditory_diff,
intero_diff,
olfactory_diff,
touch_diff,
taste_diff,
torso_diff,
hand_arm_diff,
foot_leg_diff,
head_diff,
mouth_throat_diff),
names_to = "Dimension",
values_to = "Diff") %>%
mutate(Dimension = gsub('_diff', '', Dimension)) %>%
mutate(Dimension = case_when(
Dimension == "intero" ~ "Interoception",
Dimension == "auditory" ~ "Hearing",
Dimension == "olfactory" ~ "Olfaction",
TRUE ~ str_to_title(Dimension)
))
df_diffs$Dimension = factor(df_diffs$Dimension,
levels = rev(c(
'Vision',
'Hearing',
'Olfaction',
'Taste',
'Interoception',
'Touch',
'Mouth_throat',
'Head',
'Torso',
'Hand_arm',
'Foot_leg'
)))
df_diffs %>%
filter(word == "market") %>%
ggplot(aes(x = Dimension,
y = Diff,
fill = Dimension)) +
geom_bar(stat = "summary") +
geom_vline(xintercept = 0, linetype = "dotted") +
theme_bw() +
coord_flip() +
labs(x = "Dimension",
y = "Deviation from Lancaster Norms") +
scale_fill_manual(values = viridisLite::viridis(11, option = "mako",
begin = 0.8, end = 0.15)) +
facet_wrap(~sentence) +
theme(text = element_text(size=16)) +
guides(fill = FALSE)
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
We also look at this across all words:
df_diffs %>%
ggplot(aes(x = reorder(Dimension, Diff),
y = Diff)) +
geom_violin() +
geom_jitter(alpha = .1,
width = .1) +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
coord_flip() +
labs(x = "Dimension",
y = "Deviation from Lancaster Norms") +
theme(text = element_text(size=16))
This question can in turn be decomposed into two questions:
First, are more dominant senses closer to the LS Norms overall? We might expect this to be the case if the LS Norms reflect the dominant sense; that is, when people rate the sensorimotor properties of a decontextualized word, they might be more likely to index properties associated with the most dominant contexts or meanings of that word.
And the answer is yes: more dominant senses are indeed more similar (less distant) from the Lancaster norm in terms of their sensorimotor profile.
model_with_dominance = lmer(data = df_dom_plus_sm,
distance_to_lancaster ~ dominance + (1 | word),
REML = FALSE)
model_no_dominance = lmer(data = df_dom_plus_sm,
distance_to_lancaster ~ (1 | word),
REML = FALSE)
summary(model_with_dominance)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: distance_to_lancaster ~ dominance + (1 | word)
## Data: df_dom_plus_sm
##
## AIC BIC logLik -2*log(L) df.resid
## -1305.6 -1289.2 656.8 -1313.6 444
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8511 -0.5791 -0.2326 0.4193 4.0935
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.000806 0.02839
## Residual 0.002542 0.05042
## Number of obs: 448, groups: word, 112
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.077717 0.003605 114.138534 21.56 < 2e-16 ***
## dominance -0.009957 0.002981 344.678498 -3.34 0.000929 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## dominance 0.099
anova(model_with_dominance, model_no_dominance)
## Data: df_dom_plus_sm
## Models:
## model_no_dominance: distance_to_lancaster ~ (1 | word)
## model_with_dominance: distance_to_lancaster ~ dominance + (1 | word)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## model_no_dominance 3 -1296.7 -1284.4 651.35 -1302.7
## model_with_dominance 4 -1305.6 -1289.2 656.82 -1313.6 10.934 1 0.0009444
##
## model_no_dominance
## model_with_dominance ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df_dom_plus_sm %>%
ggplot(aes(x = dominance,
y = distance_to_lancaster)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
labs(x = "Dominance",
y = "Cosine Distance to Decontextualized LS Norm") +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
cor.test(df_dom_plus_sm$dominance, df_dom_plus_sm$distance_to_lancaster)
##
## Pearson's product-moment correlation
##
## data: df_dom_plus_sm$dominance and df_dom_plus_sm$distance_to_lancaster
## t = -2.7295, df = 446, p-value = 0.006594
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.21823310 -0.03596218
## sample estimates:
## cor
## -0.1281799
And second: does dominance predict the direction of difference?
The earlier analysis of dominance suggests that more dominant senses are more concrete than less dominant senses. Thus, we might expect that more dominant senses are also more concrete on average than the decontextualized norms.
We find that this is true: that is, more dominant senses are more concrete on average than the LS norm.
df_diffs_avg = df_diffs %>%
group_by(word, sentence, context) %>%
summarise(mean_diff = mean(Diff))
## `summarise()` has grouped output by 'word', 'sentence'. You can override using
## the `.groups` argument.
df_diffs_avg = df_diffs_avg %>%
left_join(df_dom_plus_sm)
## Joining with `by = join_by(word, sentence, context)`
model_with_dominance = lmer(data = df_diffs_avg,
mean_diff ~ dominance + (1 | word),
REML = FALSE)
model_no_dominance = lmer(data = df_diffs_avg,
mean_diff ~ (1 | word),
REML = FALSE)
summary(model_with_dominance)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: mean_diff ~ dominance + (1 | word)
## Data: df_diffs_avg
##
## AIC BIC logLik -2*log(L) df.resid
## 114.1 130.5 -53.1 106.1 444
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8348 -0.5528 -0.0351 0.5782 2.8769
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.04174 0.2043
## Residual 0.05175 0.2275
## Number of obs: 448, groups: word, 112
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.659e-03 2.216e-02 1.132e+02 0.436 0.664
## dominance 8.583e-02 1.349e-02 3.407e+02 6.361 6.44e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## dominance 0.073
anova(model_with_dominance, model_no_dominance)
## Data: df_diffs_avg
## Models:
## model_no_dominance: mean_diff ~ (1 | word)
## model_with_dominance: mean_diff ~ dominance + (1 | word)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## model_no_dominance 3 150.36 162.67 -72.179 144.36
## model_with_dominance 4 114.12 130.54 -53.062 106.12 38.235 1 6.271e-10
##
## model_no_dominance
## model_with_dominance ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cor.test(df_diffs_avg$dominance, df_diffs_avg$mean_diff)
##
## Pearson's product-moment correlation
##
## data: df_diffs_avg$dominance and df_diffs_avg$mean_diff
## t = 4.7696, df = 446, p-value = 2.504e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1303131 0.3066838
## sample estimates:
## cor
## 0.2202983
df_with_baseline = read_csv("../../data/processed/sentence_pairs_with_baseline.csv") %>%
drop_na(sensorimotor_distance) %>%
drop_na(baseline_distance)
## Rows: 672 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): word, sentence1, sentence2, ambiguity_type, disambiguating_word1, ...
## dbl (12): mean_relatedness, median_relatedness, diff, count, sd_relatedness,...
## lgl (1): same
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_with_baseline)
## [1] 576